######################
#Replication Materials
######################
#######################################
#Quasi-Poisson Models with Simulations - Figure 2
#######################################

rm(list = ls())
setwd() #Set Working Directory

# Packages
pacman::p_load(rtweet,
               ggplot2,
               purrrlyr,
               dplyr,
               readxl,
               writexl,
               broom,
               tidyverse,
               dotwhisker,
               lubridate,
               stargazer,
               ggpubr, 
               jtools,
               rpart,
               caret,
               reshape2,
               ipred,
               RColorBrewer,
               rpart.plot,
               MASS,
               randomForest,
               dplyr,
               car,
               vtable,
               effects,
               interactions,
               doParallel,
               foreach
               
)

set.seed(0611)
#Import data
df <- readRDS("df.RDS")

#Subtract 1 b/c as.numeric converts factor var from 0 -> 1 and 1 -> 2
###Need to account for that by subtracting one
#df$in.memoriam <- as.numeric(df$in.memoriam)-1
#df$news.coverage <- as.numeric(df$news.coverage)-1
#df$anti.conspir <- as.numeric(df$anti.conspir)-1
#df$month <- as.numeric(df$month)

##########
##########
#QP Models
##########
##########

##########
#Likes
##########
m.conspir.po.fav <- glm(favorite_count ~ conspir.po + log_follow + 
                          log_friends + verified + month, 
                        family = "quasipoisson",
                        data = df)
m.conspir.tusk.fav <- glm(favorite_count ~ conspir.tusk + log_follow + 
                            log_friends + verified + month, 
                          family = "quasipoisson",
                          data = df)


m.conspir.collusion.fav <- glm(favorite_count ~ conspir.collusion + log_follow + 
                                 log_friends + verified + month, 
                               family = "quasipoisson",
                               data = df)

# Repeat for stargazer - not taking longer names for importing models 
m1 <- glm(favorite_count ~ conspir.po + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)
m2 <- glm(favorite_count ~ conspir.tusk + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)

m3 <- glm(favorite_count ~ conspir.collusion + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)

stargazer(m1, m2, m3, 
          header = FALSE,
          align = TRUE,
          type = "text",
          # out = "Figures for Paper/app_fav_conspir_quasi.htm",
          label = "tab:app_fav_conspir_quasi",
          column.sep.width = "-15pt",
          title = "Tweet Characteristics and Frequency of Likes- Quasi-Poisson Models",
          dep.var.caption = c("Number of Likes"),
          model.names = FALSE,
          omit = c("month"), 
          covariate.labels = c("PO Consp.",
                               "Tusk Consp.",
                               
                               "Russia and PO Consp.",
                               
                               "Log(N Followers)",
                               "Log(N Friends)",
                               "Verified",
                               "Constant"),
          notes.append = FALSE,
          notes = c("Standard Errors in parenthesis.", 
                    "*p $<$ 0.1; **p $<$ .05, ***p $<$ .01 (two-tailed tests)."))


##########
#Retweets
##########
m.conspir.po.rt <- glm(retweet_count ~ conspir.po + log_follow + 
                         log_friends + verified + month, 
                       family = "quasipoisson",
                       data = df)

m.conspir.tusk.rt <- glm(retweet_count ~ conspir.tusk + log_follow + 
                           log_friends + verified + month, 
                         family = "quasipoisson",
                         data = df)


m.conspir.collusion.rt <- glm(retweet_count ~ conspir.collusion + log_follow + 
                                log_friends + verified + month, 
                              family = "quasipoisson",
                              data = df)
#repeat for stargazer
m4 <- glm(retweet_count ~ conspir.po + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)

m5 <- glm(retweet_count ~ conspir.tusk + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)

m6 <- glm(retweet_count ~ conspir.collusion + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)

stargazer(m4, m5, m6, 
          header = FALSE,
          align = TRUE,
          type = "latex",
          #out = "Figures for Paper/app_fav_conspir_quasi.htm",
          label = "tab:app_rt_conspir_quasi",
          #column.sep.width = "-15pt",
          title = "Tweet Characteristics and Frequency of Retweets- Quasi-Poisson Models",
          dep.var.caption = c("Number of Retweets"),
          model.names = FALSE,
          omit = c("month"), #so it's prettier
          covariate.labels = c("PO Consp.",
                               "Tusk Consp.",
                               "Russia Consp.",
                               "Russia and PO Consp.",
                               
                               "Log(N Followers)",
                               "Log(N Friends)",
                               "Verified",
                               "Constant"),
          notes.append = FALSE,
          notes = c("Standard Errors in parenthesis.", 
                    "*p $<$ 0.1; **p $<$ .05, ***p $<$ .01 (two-tailed tests)."))


###############
#Simulating CIs
###############
#Create function for bootstrapping
boot_pi <- function(model, pdata, n, p) {
  odata <- model$data
  lp <- (1 - p) / 2
  up <- 1 - lp
  set.seed(2016)
  seeds <- round(runif(n, 1, 1000), 0)
  boot_y <- foreach(i = 1:n, .combine = rbind) %dopar% {
    set.seed(seeds[i])
    bdata <- odata[sample(seq(nrow(odata)), size = nrow(odata), replace = TRUE), ]
    bpred <- predict(update(model, data = bdata), type = "response", newdata = pdata)
    rpois(length(bpred), lambda = bpred)
  }
  boot_ci <- t(apply(boot_y, 2, quantile, c(lp, up)))
  return(data.frame(pred = predict(model, newdata = pdata, type = "response"), lower = boot_ci[, 1], upper = boot_ci[, 2]))
}

###
#PO
###
sim.df <- data.frame(conspir.po = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))


preds <-  predict.glm(m.conspir.po.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.po.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% slice_rows("conspir.po") %>%
                          dmap(mean))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
po.fav.diff <- c(preds3$pred2[2] - preds3$pred2[1], 
                 preds3$upper2[2] - preds3$upper2[1], 
                 preds3$lower2[2] - preds3$lower2[1])

#####
#Tusk
#####
sim.df <- data.frame(conspir.tusk = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))


preds <-  predict.glm(m.conspir.tusk.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.tusk.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% slice_rows("conspir.tusk") %>%
                          dmap(mean))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Difference
tusk.fav.diff <- c(preds3$pred2[2] - preds3$pred2[1], 
                   preds3$upper2[2] - preds3$upper2[1], 
                   preds3$lower2[2] - preds3$lower2[1])

##########
#Collusion
##########
sim.df <- data.frame(conspir.collusion = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))


preds <-  predict.glm(m.conspir.collusion.fav, sim.df, type = "response", se.fit = F)

preds2 <- cbind(sim.df, boot_pi(m.conspir.collusion.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% slice_rows("conspir.collusion") %>%
                          dmap(mean))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Diff
collusion.fav.diff <- c(preds3$pred2[2] - preds3$pred2[1], 
                        preds3$upper2[2] - preds3$upper2[1], 
                        preds3$lower2[2] - preds3$lower2[1])


#############
#Combine Favs
#############
pred_diffs <- as.data.frame(rbind(po.fav.diff, tusk.fav.diff, collusion.fav.diff))
colnames(pred_diffs) <- c("estimate", "UCI", "LCI")
pred_diffs$model <- c("PO", "Tusk", "Collusion")

pred_diffs_fav <- pred_diffs

###############
#Retweet Models
###############

###
#PO
###
sim.df <- data.frame(conspir.po = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))


preds <-  predict.glm(m.conspir.po.rt, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.po.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% slice_rows("conspir.po") %>%
                          dmap(mean))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
po.rt.diff <- c(preds3$pred2[2] - preds3$pred2[1], 
                preds3$upper2[2] - preds3$upper2[1], 
                preds3$lower2[2] - preds3$lower2[1])
#####
#Tusk
#####
sim.df <- data.frame(conspir.tusk = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))


preds <-  predict.glm(m.conspir.tusk.rt, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.tusk.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% slice_rows("conspir.tusk") %>%
                          dmap(mean))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Difference
tusk.rt.diff <- c(preds3$pred2[2] - preds3$pred2[1], 
                  preds3$upper2[2] - preds3$upper2[1], 
                  preds3$lower2[2] - preds3$lower2[1])

##########
#Collusion
##########
sim.df <- data.frame(conspir.collusion = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))


preds <-  predict.glm(m.conspir.collusion.rt, sim.df, type = "response", se.fit = F)

preds2 <- cbind(sim.df, boot_pi(m.conspir.collusion.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% slice_rows("conspir.collusion") %>%
                          dmap(mean))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Diff
collusion.rt.diff <- c(preds3$pred2[2] - preds3$pred2[1], 
                       preds3$upper2[2] - preds3$upper2[1], 
                       preds3$lower2[2] - preds3$lower2[1])


#############
#Combine Rts
#############
pred_diffs <- as.data.frame(rbind(po.rt.diff, tusk.rt.diff, collusion.rt.diff))
colnames(pred_diffs) <- c("estimate", "UCI", "LCI")
pred_diffs$model <- c("PO", "Tusk", "Collusion")


rt_diffs <- pred_diffs


###### 
#### Plot together #####
######

rt_diffs$Measure <- c("Retweets")
pred_diffs_fav$Measure <- c("Likes")

rt_fav_diffs <- rbind(rt_diffs, pred_diffs_fav)
colnames(rt_fav_diffs) <- c("Estimate", "UCI", "LCI", "Model", "Measure")

rt_fav_diffs_plot <- ggplot(rt_fav_diffs, aes(x = Model, y = Estimate,
                                              shape = factor(Measure), color = factor(Measure))) + 
  geom_point(position=position_dodge(width=.5)) + 
  coord_flip() +
  geom_errorbar(aes(ymin = LCI, ymax = UCI), width=.1,
                position=position_dodge(width=.5)) +
  ylim(-10,25) +  
  theme_classic(base_size=20) +
  labs(y = "Difference in the Predicted Level \n of Engagement When Content Included",
       x = "Conspiracy Theory",
       shape = "Measure")+ 
  scale_color_grey(start = .2, end = .5, name = "Measure") + 
  geom_hline(aes(yintercept = 0), color = "blue", linetype = "dotted")

rt_fav_diffs_plot
